Tree Data

Author

Paul S

Published

June 6, 2024

Code
devtools::install_github("dmurdoch/leaflet@crosstalk4")
library(tidyverse)
library(sf)
library(leaflet)
library(stringr)
library(knitr)
library(reactablefmtr)
library(plotly)
library(htmltools)
library(crosstalk)
library(patchwork)
library(ggtext)
library(DT)
library(RColorBrewer)
library(showtext)

agg_df <- readRDS('../r_objects/agg_df.Rdata')
sa2_sf <- readRDS('../r_objects/sa2_sf.Rdata') %>%
  #rename(sa2_code_2021 = "SA2_CODE21") %>%
  st_set_geometry('geometry')

sal_sf <- readRDS('../r_objects/sal_sf.Rdata') %>%
  st_set_geometry('geometry')

lga_sf <- readRDS('../r_objects/lga_sf.Rdata') %>%
  st_set_geometry('geometry') %>%
  rename( "lga_name_2022" = LGA_NAME22)

source('../r/functions.r')
source('../r/mapping_functions.r')
source('../themes/theme.r')
Code
grouping = 'SAL_NAME21'
grouping_sf = sal_sf
pretty_group_name = 'Suburb'


sf_use_s2(FALSE)

melbourne <- st_sfc(st_point(c(144.963115,-37.814175)), crs = 7844)

# random_suburb <- grouping_sf[1:2,] %>% st_centroid() %>% select('centroid') %>% st_as_sfc()
# 
# p = c(melbourne, random_suburb)
# 
# lwgeom::st_geod_azimuth(p,)
# 
# random_suburb <- grouping_sf[1:1,] %>% st_centroid() %>% st_cast('POINT')
# 
# rs <- random_suburb %>%
#   mutate(bearing = c(lwgeom::st_geod_azimuth(.), units::set_units(NA, 'degrees')))

random_suburb <- grouping_sf %>% select('SAL_NAME21', 'geometry', 'distance') %>% st_centroid() %>%
  rowwise() %>%
  mutate(lat = st_coordinates(geometry)[1], lng = st_coordinates(geometry)[2])

# 
# point <- st_sfc(st_point(c(random_suburb$lat[1],random_suburb$lng[1])), crs = 7844)
# g1 <- st_sfc(c( melbourne , point ))

# lwgeom::st_geod_azimuth(g1)


# g4 <- g3 %>% mutate(direction = case_when(
#   bearing > 315 || bearing < 45 ~ "North",
#   bearing > 45 && bearing < 135 ~ "East",
#   bearing > 135 && bearing < 225 ~ "South",
#   bearing > 225 && bearing < 315 ~ "West"
# ))


directions <- random_suburb %>%
  rowwise() %>%
  mutate(pt = st_sfc(st_point(c(lat,lng)), crs = 7844)) %>%
  mutate(bearing = lwgeom::st_geod_azimuth( st_sfc(c(melbourne, pt)) )) %>%
  mutate(bearing = as.numeric(units::set_units(bearing, "degrees")) )%>%
  mutate(bearing = ifelse(bearing < 0, 360 + bearing, bearing)) %>%
  mutate(direction = case_when(
    bearing > 0 && bearing < 90 ~ "North East",
    bearing > 90 && bearing < 180 ~ "South East",
    bearing > 180 && bearing < 270 ~ "South West",
    bearing > 270 && bearing < 360 ~ "North West"
  )) %>%
  rowwise() %>%
  mutate(direction = ifelse(distance < 7500, "Inner", direction)) %>%
  select(SAL_NAME21, direction) %>%
  st_drop_geometry()

Tree coverage

Note: to see how the data is constructed, see the Documentation.

Insert Lit Review here

Code
trees_grouped <- coverage(group = grouping) %>%
  left_join(grouping_sf, by = (grouping)) %>%
  st_set_geometry('geometry') %>%
  dplyr::select(grouping, tree_percentage,distance, geometry) %>%
  rowwise() %>%
  mutate(tree_percentage = round(tree_percentage, 3)) %>%
  mutate(distance = distance / 1000) %>%
  as('Spatial')

#bar chart
sd_map = SharedData$new(trees_grouped)
sd_df = SharedData$new(as.data.frame(trees_grouped@data), group = sd_map$groupName())

  #mapCoverage(trees_per_sa2_cw, trees_per_sa2_cw$data()$tree_percentage , 'Greens', 'Tree Coverage (%)')

  treePal <- colorNumeric(palette = 'Greens', domain = as.data.frame(sd_df$data())$tree_percentage)
    
  leaflet(sd_map) %>%
    setView(lng = 144.963115, lat = -37.814175, zoom = 10) %>%
    addProviderTiles('CartoDB.Positron') %>%
    addPolygons(fillColor = ~treePal(tree_percentage),
                fillOpacity = 0.5,
                opacity = 0)  %>%
    addLegend(position = "bottomright",
              pal = treePal,
              values = sd_df$data()$tree_percentage,
              title = 'title')
Code
  filter_slider("distance", "Distance to CBD", sd_df, ~distance, min = 0, max = max(sd_df$data()$distance) + 1, width = '75%')
Code
  extra_cols = list()
  extra_cols[[grouping]] <- colDef(name = pretty_group_name, filterable = T)
         
  reactable(sd_df,
  defaultSorted = list('tree_percentage' = "desc"),
  pagination = TRUE,
  theme = sandstone(),
  columns = c(list(
    #grouping = colDef(filterable = TRUE),
    tree_percentage = colDef(
      name = 'Tree Coverage (%)',
      cell = data_bars(trees_grouped,
                       text_position = "above",
                       fill_color = c("#EDF8E9", "#006D2C"),
                       #fill_gradient = TRUE,
                       #fill_color_ref = viridi(5),
                       background = "lightgrey",
                       max_value = 100,
                       brighten_text = FALSE,
                       bar_height = 20
                       )
    ),
    distance = colDef(show = FALSE)
  ),extra_cols)
)

Coverage of streets, parks and residential areas

The plot below is a stacked bar chart which displays both the total coverage of streets, parks and residential areas for each LGA, as well as displaying how the total coverage is split between its three components.

Code
grouping = 'lga_name_2022'
grouping_sf = lga_sf
pretty_group_name = "Local Government Area"

create_combined_df <- function(grouping, grouping_sf) {
  street_trees_grouped <- coverage(exp = expression(zone_short == 'roads'), type = 'street', group = grouping) %>%
  rename("street_tree_coverage" = coverage, "total_street_area" = total_area )

  reserve_trees_grouped <- coverage(exp = expression(feature_type == 'reserve'), type = 'reserve', group = grouping)  %>%
    rename("reserve_tree_coverage" = coverage, "total_reserve_area" = total_area ) 
  
  resi_trees_grouped <- coverage(exp = expression(zoning_permits_housing == 'Housing permitted'), type = 'residential', group = grouping)  %>%
    rename("residential_tree_coverage" = coverage, "total_residebtial_area" = total_area )
  
  #combine into one and clean
  combined_df <- street_trees_grouped %>%
    left_join(reserve_trees_grouped, by = grouping) %>%
    left_join(resi_trees_grouped, by = grouping) %>%
    rowwise() %>%
    mutate(total_coverage_area = sum(across(ends_with('_coverage')), na.rm = T), total_area =  sum(across(ends_with('_area')), na.rm = T)) %>%
    mutate(total_coverage_pc = total_coverage_area / total_area ) %>%
    mutate( weighted_street_pc = (street_tree_coverage / total_coverage_area) * total_coverage_pc,
            weighted_reserve_pc = (reserve_tree_coverage / total_coverage_area) * total_coverage_pc,
            weighted_resi_pc = (residential_tree_coverage / total_coverage_area) * total_coverage_pc
          ) %>%
    dplyr::select(c(grouping,'street_percentage', 'reserve_percentage', 'residential_percentage', 'weighted_street_pc', 'weighted_reserve_pc', 'weighted_resi_pc', 'total_coverage_pc')) %>%
    left_join(grouping_sf %>% st_drop_geometry() %>% select(grouping, distance), by = grouping) %>%
    rowwise() %>%
    # mutate(distance = distance_map[as.character(as.name(grouping))]) %>%
    mutate(distance = distance / 1000) %>%
    ungroup() %>%
    as.data.frame()
    # mutate(across(where(is.numeric), ~ round(.x, 1))) %>%
    # mutate(across(ends_with('percentage'), ~ ifelse(is.na(.x), 0, .x) )) %>%
    
}

combined_df = create_combined_df(grouping, grouping_sf)


sd_combined = SharedData$new((combined_df))

GGplot Test

Code
combined_longer <- pivot_longer(combined_df,
                                cols = c('weighted_street_pc', 'weighted_reserve_pc', 'weighted_resi_pc'),
                                names_to = 'weights',
                                values_to = "test")

stacked_chart <- ggplot(combined_longer, aes(fill=weights, y = test, x= reorder(!!as.name(grouping), test) )) + 
    geom_bar(position="stack", stat="identity") +
  ylab('Total tree coverage (%)') +
  xlab (pretty_group_name) +
  labs( fill = 'Land type') +
  scale_x_discrete(guide = guide_axis(angle = 0)) +
  scale_fill_manual(labels = c('a','b','c'), values = c('#7CAE00', 'tomato', '#619CFF') ) +
  coord_flip() +
  theme_minimal()


p1 <- plotly_build(stacked_chart)
p1$x$data[[1]]$name = "Reserves"
p1$x$data[[2]]$name = "Residential"
p1$x$data[[3]]$name = "Streets"

p1
Code
grouping = 'SAL_NAME21'
grouping_sf = sal_sf
pretty_group_name = 'Suburb'

slider <- filter_slider("distance", "Distance to CBD", sd_combined, ~distance, min = 0, max = max(sd_combined$data()$distance), width = '75%')
     
#weird bubble plot  
bubble <- reactable(sd_combined,
          defaultSorted = list('street_percentage' = "desc"),
          defaultColDef = colDef(
  cell = bubble_grid(combined_df, shape = 'circles', colors = c("#EDF8E9", "#006D2C")),
  align = 'center',
  vAlign = 'center'),
  columns = c(list(distance = colDef(show = F),
                   weighted_reserve_pc = colDef(show = F),
                   weighted_street_pc = colDef(show = F),
                   weighted_resi_pc = colDef(show = F),
                   total_coverage_area = colDef(show = F),
                   total_coverage_pc = colDef(show = F),
                 street_percentage = colDef(name = 'Streets'),
                 reserve_percentage = colDef(name = 'Parks'),
                 residential_percentage = colDef(name = 'Residential')),extra_cols),
  theme = sandstone(),
  defaultSortOrder = 'desc')

Interactions with Missing Middle

YM recently released Housing Targets, a data-driven model which allocates housing targets by council.

This page calculates how much tree cover might be lost in the upzoning of residential lots, and then calculating how to offset this loss by increasing coverage of street trees

Code
mm_lgas <- c('Brimbank', 'Merri-bek', 'Banyule', 'Darebin', 'Yarra', 'Moonee Valley', 'Manningham', 'Maribyrnong', 'Melbourne', 'Hobsons Bay', 'Port Phillip', 'Boroondara', 'Stonnington', 'Glen Eira', 'Bayside', 'Monash', 'Whitehorse', 'Maroondah', 'Manningham', 'Kingston')


years <- 10

targets <- read.csv('../council_targets.csv') [,1:2] %>%
  as.data.frame() %>%
  mutate(Target = sub(',', '', Target))

target_df <- data.frame()

for(lga in targets$LGA) {
  home_target <- as.numeric((targets %>% filter(LGA == lga))$Target) * years
 
  target_df <<- rbind(target_df, c( home_target ,lga_targets(lga, home_target)) ) 
}

colnames(target_df) = c('home_target','LGA', 'lost_coverage', 'initial_coverage', 'final_coverage')

target_df <- target_df %>%
  mutate(across(c('initial_coverage', 'final_coverage', 'lost_coverage'), ~ round(as.numeric(.x) / (10^4), 1) )) %>%
  rowwise() %>%
  mutate(percentage_point_change = as.numeric(final_coverage) - as.numeric(initial_coverage),
                                  percentage_change = ((as.numeric(final_coverage)/as.numeric(initial_coverage))-1)*100 ) %>%
  arrange(-percentage_change) %>%
  mutate(percentage_change = round(percentage_change, 3)) %>%
  dplyr::relocate(home_target, .after = LGA)

target_df %>%
  reactable(theme = sandstone(),
            pagination = F,
            columns = list(
              percentage_point_change = colDef(show = F),
              initial_coverage = colDef(name = "Coverage After Targets (Ha)"),
              lost_coverage = colDef(name = "Lost Coverage (Ha)"),
              final_coverage = colDef(name = "Public Coverage To Offset Density  (Ha)"),
              percentage_change = colDef(name = "Percent Growth From Current Public Coverage"),
              home_target = colDef(name = paste0(years,'yr Home Target'))
            ))

Zoning

This section breaks down canopy coverage by zone type. As expected, low density residential and civic land (parks) top the list. It is tempting to note that Greenfield is below industrial, however, Greenfield by definition has not had enough time for planted trees to grow large enough for canopy cover.

Code
coverage(exp = expression(!(zone_short %in% c('roads','other'))),group = 'zone_short') %>%
  
  reactable(
    defaultSorted = list('tree_percentage' = "desc"),
    pagination = F,
    theme = sandstone(),
    columns = list(
    tree_percentage = colDef(
      name = 'Coverage %',
      cell = data_bars(.,
      text_position = "above",
                       fill_color = c("#EDF8E9", "#006D2C"),
                       #fill_gradient = TRUE,
                       #fill_color_ref = viridi(5),
                       background = "lightgrey",
                       max_value = 100,
                       brighten_text = FALSE,
                       bar_height = 20
    )),
    coverage = colDef(show = F),
    total_area = colDef(show = F),
    zone_short = colDef(name = 'Zone type')
  ))

Regressions

This section shows the interaction between public tree coverage and the cost of living in an area, depicted by both house prices and median rents. Public tree coverage is defined as:

\[ \text{Public tree coverage} = \frac{\text{area of streets covered by trees + area of parks covered by trees}}{\text{area of streets + area of parks}} \]

Data Note: Rents are using ABS data, which is only available in ranges. Therefore, median rents are defined as whatever bin the median observation falls into. Also note that the Census was in 2021, and therefore rents may be impacted by COVID, and may have changed since. House prices are from Housing Victoria, and are an average of the last 5 quarters median prices.

TODO: Check Public Definitions

Code
lot_sizes <- agg_df %>%
  filter(zoning_permits_housing == 'Housing permitted') %>%
  group_by(SAL_NAME21) %>%
  summarise(med_lot = median(lot_size, na.rm = TRUE), n = n()) %>%
  rowwise()

rents = read_csv('../data/rents_by_sal.csv') %>%
  manipulateRents() %>%
  rowwise() %>%
  mutate(median_band = gsub('\\$', '', median_band)) 


public_coverage <- coverage(exp = expression(feature_preventing_development == TRUE | zone_short == 'roads'), type = 'public', group = grouping) %>% select('SAL_NAME21', 'public_percentage')

house_price_data = readxl::read_xls('../data/Median-House-Price-3rd-Quarter.xls', skip = 1)[-c(1:5),1:6] 

combined_df = create_combined_df(grouping, grouping_sf)

house_price_data = house_price_data %>%
  rowwise() %>%
  mutate(avg = mean(as.numeric(
    c(`Jul - Sep 2022...2`,
      `Oct - Dec 2022`,
      `Jan - Mar 2023`,
      `Apr - Jun 2023...5`,
      `Jul - Sep 2023...6`)), na.rm = TRUE)) %>% 
  select(SUBURB, avg,) %>%
  rename(SAL_NAME21 = SUBURB) %>%
  mutate(SAL_NAME21 = str_to_title(SAL_NAME21)) %>%
  left_join(combined_df, by = grouping) %>%
  filter(!is.na(distance), !is.na(avg)) %>%
  rename(average_house_price = avg) %>%
  mutate(average_house_price = average_house_price / 10^6) %>%
  left_join(lot_sizes, by = 'SAL_NAME21') %>%
  left_join(rents, by = 'SAL_NAME21') %>%
  left_join(public_coverage, by = 'SAL_NAME21') %>%
  adf() %>%
  mutate(median_band = as.factor(median_band)) %>%
  mutate(within15 = ifelse(distance < 15, T, F))

#house_price_data = house_price_data %>% left_join(directions, by = 'SAL_NAME21')

#missing_suburbs = setdiff(combined_df$SAL_NAME21, house_price_data$SAL_NAME21)
#wtf

house_interactable = SharedData$new(house_price_data)

filter_slider("distance", "Distance to CBD", house_interactable, max = 30, ~distance, width = '75%')
Code
# price_street_plot <- ggplot(house_interactable, mapping = aes(x = average_house_price, y = street_percentage, text = SAL_NAME21), axe) + xlab('Average House Price ($m)') + ylab('Street Tree Coverage (%)') + geom_point(color = "#006D2C") + theme_minimal()
# 
# price_residential_plot <- ggplot(house_interactable, mapping = aes(x = average_house_price, y = residential_percentage, text = SAL_NAME21), axe) + xlab('Average House Price ($m)') + ylab('Residential Area Tree Coverage (%)') + geom_point(color = "#006D2C") + theme_minimal()

price_public_plot <- ggplot(house_interactable, mapping = aes(x = average_house_price, y = public_percentage, text = SAL_NAME21, fill = within15, stroke = NA), axe) + xlab('Average Detached House Price ($m)') + ylab(' Tree Coverage of Public Areas (%)') + geom_point() + labs(caption = "Source: Housing Victoria") +  theme_minimal()

rents_public_plot <- (ggplot(house_interactable, mapping = aes(x = median_band, y = public_percentage, text = SAL_NAME21)) + geom_boxplot(color = "#006D2C") + xlab('Median Rent (2021 dollars)') + ylab("Tree Coverage of Public Areas (%)") +  theme_minimal() + labs(caption = "Source: 2021 Census (ABS)") +  theme(axis.text.x = element_text(angle = 45)) ) 

#bscols(widths = c(4,4), ggplotly(price_street_plot), ggplotly(price_residential_plot))
#subplot(ggplotly(price_street_plot),ggplotly(price_residential_plot), titleY = T, titleX = T)

ggplotly(price_public_plot)

The above figure displays the relationship between house prices and public tree coverage. There is a tangible correlation between the two variables as suburbs with higher house prices tend to have more public tree coverage. This is not a statement of causation, merely correlation.

The data is further split into two groups: suburbs within 15 kilometres of the city, and suburbs outside the 15km range. This split reveals two distinct spatial patterns.

For ‘inner’ suburbs, the relationship is much steeper. For outer suburbs, the slope of a best fit line is much shallower.

Code
ggplotly(rents_public_plot)
Code
# sal_w_rents <- sal_sf %>%
#   left_join(rents, by = 'SAL_NAME21') %>%
#   arrange(median_band) %>%
#   mutate(median_band = as.factor(median_band))
# 
# cols = brewer.pal(4, 'RdBu' )
# cols = colorRampPalette(cols)(18)
# rentPal =  colorFactor( cols  , domain = sal_w_rents$median_band)

# sal_w_rents %>%
#   leaflet() %>%
#   addProviderTiles('CartoDB.Positron') %>%
#   addPolygons(fillColor = ~rentPal(median_band), fillOpacity = 1, opacity = 0) %>%
#   addLegend(position = "bottomright",
#               pal = rentPal,
#               values =sal_w_rents$median_band,
#               title = 'title')



#plot( house_price_data$median_band, house_price_data$public_percentage)


#colours <- brewer.pal(5, "Set1")
#names(colours) <- levels(house_price_data$direction)

#ggplot(house_price_data, mapping = aes(x = average_house_price, y = residential_percentage, color = direction, text = SAL_NAME21), axe) + xlab('Average House Price ($m)') + ylab('Residential Area Tree Coverage (%)') + geom_point() + scale_color_manual(name = "direction", values = colours) + theme_minimal()

The graph above displays the relationship between median rents and public tree coverage. It is not possible to fit a line of best fit, given the categorical nature of rents.

A slight relationship is clear, as median public coverage is increasing with median rents until median rents hit 500. After the ‘500-524’ band, public coverage drops and the relationship no longer holds, although this is mostly due to expensive suburbs located close to the CBD such as Carlton and Fitzroy.

Code
lot_df = house_price_data %>% filter(med_lot < 2000)

lotsize_street_plot <- ggplot(lot_df, mapping = aes(x = med_lot, y = street_percentage, text = SAL_NAME21), axe) + xlab('Average Lot Size') + ylab('Street Tree Coverage (%)') + geom_point(color = "#006D2C") + theme_minimal()

lotsize_residential_plot <- ggplot(lot_df, mapping = aes(x = med_lot, y = residential_percentage, text = SAL_NAME21), axe) + xlab('Average Lot Size') + ylab('Residential Tree Coverage (%)') + geom_point(color = "#006D2C") + theme_minimal()


#ggplotly(lotsize_street_plot)
#ggplotly(lotsize_residential_plot)

#summary(lm(residential_percentage ~ med_lot, house_price_data)) 

#summary(lm(street_percentage ~ med_lot, house_price_data))


#summary(lm(public_percentage ~ med_lot + average_house_price + distance, lot_df))

International Comparisons

The plot below shows how Melbourne compares to other international examples. It displays how coverage changes as distance from the centre increases. Vienna is a clear winner, followed by Sydney and Paris. It is also worth noting that only a 20km by 20km box around the centre was computed for each city, hence these statistics focus on the inner core of cities.

Note: this is using LOESS smoothing, as the scatter or line plots have significant variance. Grey bands denote a 95% confidence interval.

Code
cities = c('paris', 'vienna', 'london', 'newyork', 'melbourne', 'sydney', 'tokyo' )

big_df <- data.frame()

walk(cities, loadFiles)

# big_df = big_df %>%
#   filter(med_cov != 0, mean_cov != 0)

# big_df = big_df %>%
#   mutate(med_cov = 100 * med_cov, mean_cov = 100 * mean_cov)

big_df = big_df %>% as.data.frame()

big_df = big_df %>% filter(cov < 1)

int_comp <- ggplot(big_df, mapping = aes(distance, cov, colour = city, group = city)) +
  xlab("Distance to Centre (m)") +
  ylab("Mean Tree Coverage (%)") +
  labs(title = "Tree Coverage of Major Cities") +
  geom_smooth(method = 'loess') +
  theme_minimal()



ggplotly(int_comp)
`geom_smooth()` using formula = 'y ~ x'
Code
# mean_plot <- ggplot(big_df, mapping = aes(distance, mean_cov, colour = city, group = city)) +
#   xlab("Distance to Centre (m)") +
#   ylab("Mean Tree Coverage (%)") +
#   labs(title = "Tree Coverage of Major Cities") +
#   geom_smooth(method = 'loess') +
#   theme_minimal()
# 
# median_plot <- ggplot(big_df, mapping = aes(distance, med_cov, colour = city, group = city)) +
#   xlab("Distance to Centre (m)") +
#   ylab("Median Tree Coverage (%)") +
#   labs(title = "Tree Coverage of Major Cities") +
#   geom_smooth(method = 'loess') +
#   theme_minimal()
# 
# ggplotly(mean_plot)
# 
# ggplotly(median_plot)